library(geomorph)
library(dplyr)
library(tidyr)
library(ggplot2)
library(effectsize)
library(ggrepel)
library(ggExtra)
The following is the exact supplemental Rmarkdown file from this Journal of Archaeological Science article. It contains a bunch of extra analyses, like a consideration of intra- and interanalyst error.
INTRODUCTION
This Rmarkdown file is organized into six major sections: Introduction, Packages Needed, Validating Centroid Size-Based Body Size Reconstruction, Archaeological Body Size Estimations, Modern Comparisons, and Intra- Interindividual Error Testing. This file corresponds to analyses and figures produced in the manuscript Body Size from Unconventional Specimens. However, it also provides supplemental figures and analyses not presented in the body of the manuscript. All data are directly imported from their most raw formats (housed in corresponding folders in this Supplemental file) so that data manipulation is explicit and analyses are reproducible. The section INTRA- AND INTERINDIVIDUAL ERROR TESTING and the subsection Error Associated with SL to TL Length-Length Conversion are referenced to in the manuscript but statistical analyses and interpretation are presented here.
PACKAGES NEEDED
VALIDATING CENTROID SIZE-BASED BODY SIZE RECONSTRUCTION
<-read.table(
mydata "Validating Centroid Size/Vertebra_Analysis_Centroid.txt", header=TRUE,
row.names=1, stringsAsFactors = FALSE)
<- read.table("Basic Files/Body_Size.txt", header=TRUE)
body.size
<- read.table("Validating Centroid Size/Vertebra_Analysis_Width.txt",
width header=TRUE)
<- read.table("Basic Files/Species.txt", header=TRUE)
species
<-arrayspecs(mydata, ncol(mydata)/3, 3)
a
<- gpagen(a, curves = NULL, surfaces = NULL, PrinAxes = TRUE,
mydata.gpa max.iter = NULL, ProcD = TRUE, Proj = TRUE,
print.progress = FALSE)
<- data.frame(mydata.gpa$Csize)
centroid.df <- tibble::rownames_to_column(centroid.df, "ID")
centroid.df
<- centroid.df %>%
centroid.clean separate("ID", into = c("ID", "Vert_Num")) %>%
merge(body.size, by="ID") %>%
::rename(Csize = mydata.gpa.Csize)
dplyr
<- width %>%
centroid.clean.width separate("ID", into = c("ID", "Vert_Num")) %>%
merge(centroid.clean, by= c("ID", "Vert_Num"))
<- lm(data = centroid.clean.width, SL ~ Width)
lm1
<- lm(data = centroid.clean, SL ~ Csize)
lm2
<- centroid.clean.width %>%
full.dataset mutate(Size.Centroid = (lm2$coefficients[[2]]*Csize)+lm2$coefficients[[1]],
Size.Width = (lm1$coefficients[[2]]*Width)+lm1$coefficients[[1]],
PE.Centroid = ((SL - Size.Centroid)*100)/Size.Centroid,
PE.Width = ((SL - Size.Width)*100)/Size.Width)
<- full.dataset %>%
MPE group_by(ID) %>%
::summarize(MPE.Centroid = mean(PE.Centroid),
dplyrMPE.Width = mean(PE.Width))
Standard Length and Centrum Width
<- ggplot(data = full.dataset, mapping = aes(x = Width, y = SL))
p
+ geom_point(aes(color = ID), alpha = 0.5, size = 3) +
p geom_smooth(formula = y ~ x, method = "lm", size = 1.25,
color = "#4d4d4d") +
theme_classic() +
ylim(166, 500) +
annotate("text", x = Inf, y = 166,
label =
"paste(italic(R^2), \" = .91 \")",
parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
labs(x = "Centrum Width (mm)", y = "Standard Length (mm)") +
theme(legend.position = "none",
axis.line = element_line(color = "#4d4d4d", size = 1),
axis.text.x = element_text(color = "#4d4d4d", size = 12),
axis.text.y = element_text(color = "#4d4d4d", size = 12),
axis.title.x = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.title.y = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
axis.ticks.y = element_line(color = "#4d4d4d", size = 1))
summary(lm(data = full.dataset, SL ~ Width))
Call:
lm(formula = SL ~ Width, data = full.dataset)
Residuals:
Min 1Q Median 3Q Max
-51.589 -11.046 0.586 10.286 59.150
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 40.4340 5.3646 7.537 1.36e-12 ***
Width 43.8625 0.9596 45.711 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 17.73 on 213 degrees of freedom
Multiple R-squared: 0.9075, Adjusted R-squared: 0.9071
F-statistic: 2090 on 1 and 213 DF, p-value: < 2.2e-16
Standard Length and Centroid Size
<- ggplot(data = full.dataset, mapping = aes(x = Csize, y = SL))
p + geom_point(aes(color = ID), alpha = 0.5, size = 3) +
p geom_smooth(formula = y ~ x, method = "lm", size = 1.25,
color = "#4d4d4d") +
theme_classic() +
ylim(166, 500) +
annotate("text", x = Inf, y = 166,
label =
"paste(italic(R^2), \" = .86 \")",
parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
labs(x = "Centroid Size", y = "Standard Length (mm)") +
theme(legend.position = "none",
axis.line = element_line(color = "#4d4d4d", size = 1),
axis.text.x = element_text(color = "#4d4d4d", size = 12),
axis.text.y = element_text(color = "#4d4d4d", size = 12),
axis.title.x = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.title.y = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
axis.ticks.y = element_line(color = "#4d4d4d", size = 1))
summary(lm(data = full.dataset, SL ~ Csize))
Call:
lm(formula = SL ~ Csize, data = full.dataset)
Residuals:
Min 1Q Median 3Q Max
-39.810 -14.574 -3.581 12.309 100.548
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 44.3412 6.5309 6.789 1.1e-10 ***
Csize 21.3524 0.5783 36.920 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 21.43 on 213 degrees of freedom
Multiple R-squared: 0.8649, Adjusted R-squared: 0.8642
F-statistic: 1363 on 1 and 213 DF, p-value: < 2.2e-16
Centrum Width Estimation and Centroid Size Estimation
<- ggplot(data = full.dataset, mapping = aes(x = Size.Centroid,
p y = Size.Width))
+ geom_point(aes(color = ID), alpha = 0.5, size = 3) +
p geom_smooth(formula = y ~ x, method = "lm", size = 1.25,
color = "#4d4d4d") +
theme_classic() +
annotate("text", x = Inf, y = 160,
label = "paste(italic(R^2), \" = .94 \")",
parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
labs(x = "Centroid Size Estimated Standard Length (mm)",
y = "Centrum Width Estimated Standard Length (mm)") +
theme(legend.position = "none",
axis.line = element_line(color = "#4d4d4d", size = 1),
axis.text.x = element_text(color = "#4d4d4d", size = 12),
axis.text.y = element_text(color = "#4d4d4d", size = 12),
axis.title.x = element_text(color = "#4d4d4d", size = 13.5,
face = "bold"),
axis.title.y = element_text(color = "#4d4d4d", size = 13.5,
face = "bold"),
axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
axis.ticks.y = element_line(color = "#4d4d4d", size = 1))
summary(lm(data = full.dataset, Size.Width ~ Size.Centroid))
Call:
lm(formula = Size.Width ~ Size.Centroid, data = full.dataset)
Residuals:
Min 1Q Median 3Q Max
-37.659 -7.359 -0.599 6.340 44.890
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.42343 5.03004 0.482 0.63
Size.Centroid 0.99132 0.01768 56.072 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 13.99 on 213 degrees of freedom
Multiple R-squared: 0.9366, Adjusted R-squared: 0.9363
F-statistic: 3144 on 1 and 213 DF, p-value: < 2.2e-16
Predicion Error (PE)
<- ggplot(data = full.dataset,
p mapping = aes(x = PE.Centroid, y = PE.Width))
+ geom_point(aes(color = ID), alpha = 0.5, size = 3) +
p geom_smooth(formula = y ~ x, method = "lm", size = 1.25,
color = "#4d4d4d") +
theme_classic() +
annotate("text", x = Inf, y = -15,
label = "paste(italic(R^2), \" = .56 \")",
parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
labs(x = "PE Centroid Size", y = "PE Centrum Width") +
theme(legend.position="none",
axis.line = element_line(color = "#4d4d4d", size = 1),
axis.text.x = element_text(color = "#4d4d4d", size = 12),
axis.text.y = element_text(color = "#4d4d4d", size = 12),
axis.title.x = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.title.y = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
axis.ticks.y = element_line(color = "#4d4d4d", size = 1))
summary(lm(data = full.dataset, PE.Width ~ PE.Centroid))
Call:
lm(formula = PE.Width ~ PE.Centroid, data = full.dataset)
Residuals:
Min 1Q Median 3Q Max
-8.8205 -2.8283 -0.0026 2.1242 12.3219
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.07220 0.28069 -0.257 0.797
PE.Centroid 0.60946 0.03733 16.326 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 4.116 on 213 degrees of freedom
Multiple R-squared: 0.5558, Adjusted R-squared: 0.5537
F-statistic: 266.5 on 1 and 213 DF, p-value: < 2.2e-16
Predicted SL w/ Centroid Size ~ Actual SL Centroid Size underpredits values
%>%
full.dataset ggplot(mapping = aes(x = SL, y = Size.Centroid)) +
geom_point(aes(color = ID), alpha = 0.5, size = 3) +
geom_smooth(formula = y ~ x, method = "lm", size = 1.25,
color = "#4d4d4d") +
theme_classic() +
labs(x = "Actual SL", y = "Predicted SL w/ Centroid Size") +
annotate("text", x = Inf, y = 175,
label = "paste(italic(R^2), \" = .86 \")",
parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
theme(legend.position="none",
axis.line = element_line(color = "#4d4d4d", size = 1),
axis.text.x = element_text(color = "#4d4d4d", size = 12),
axis.text.y = element_text(color = "#4d4d4d", size = 12),
axis.title.x = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.title.y = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
axis.ticks.y = element_line(color = "#4d4d4d", size = 1))
summary(lm(data = full.dataset, Size.Centroid ~ SL))
Call:
lm(formula = Size.Centroid ~ SL, data = full.dataset)
Residuals:
Min 1Q Median 3Q Max
-76.13 -10.50 1.93 11.25 45.98
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 37.75302 6.68342 5.649 5.14e-08 ***
SL 0.86485 0.02343 36.920 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 19.93 on 213 degrees of freedom
Multiple R-squared: 0.8649, Adjusted R-squared: 0.8642
F-statistic: 1363 on 1 and 213 DF, p-value: < 2.2e-16
Predicted SL w/ Centrum Width ~ Actual SL
%>%
full.dataset ggplot(mapping = aes(x = SL, y = Size.Width)) +
geom_point(aes(color = ID), alpha = 0.5, size = 3) +
geom_smooth(formula = y ~ x, method = "lm", size = 1.25,
color = "#4d4d4d") +
theme_classic() +
labs(x = "Actual SL", y = "Predicted SL w/ Centrum Width") +
annotate("text", x = Inf, y = 175,
label = "paste(italic(R^2), \" = .91 \")",
parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
theme(legend.position="none",
axis.line = element_line(color = "#4d4d4d", size = 1),
axis.text.x = element_text(color = "#4d4d4d", size = 12),
axis.text.y = element_text(color = "#4d4d4d", size = 12),
axis.title.x = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.title.y = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
axis.ticks.y = element_line(color = "#4d4d4d", size = 1))
summary(lm(data = full.dataset, Size.Width ~ SL))
Call:
lm(formula = Size.Width ~ SL, data = full.dataset)
Residuals:
Min 1Q Median 3Q Max
-52.495 -10.535 -1.994 10.281 55.812
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 25.84186 5.66415 4.562 8.54e-06 ***
SL 0.90749 0.01985 45.711 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 16.89 on 213 degrees of freedom
Multiple R-squared: 0.9075, Adjusted R-squared: 0.9071
F-statistic: 2090 on 1 and 213 DF, p-value: < 2.2e-16
%>%
full.dataset select(PE.Centroid, PE.Width) %>%
gather(key = "PE", value = "value") %>%
ggplot(aes(x = value, color = PE, fill = PE)) +
geom_density(size = 1.5, alpha = 0.5) +
annotate(geom = "text", label = "Centroid Size", x = -5.5, y = .05, hjust = 1,
vjust = 0.5, color = "#f8766d", fontface = "bold") +
annotate(geom = "text", label = "Width", x = 4, y = .06, hjust = 0,
vjust = 0.5, color = "#00bfc4", fontface = "bold") +
theme_classic() +
theme(legend.position = "none",
axis.line = element_line(color = "#4d4d4d", size = 1),
axis.text.x = element_text(color = "#4d4d4d", size = 12),
axis.text.y = element_text(color = "#4d4d4d", size = 12),
axis.title.x = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.title.y = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
axis.ticks.y = element_line(color = "#4d4d4d", size = 1)) +
geom_vline(xintercept = 0, linetype = 2, color = "#4d4d4d", size = 1) +
labs(x = "Prediction Error", y = "Density") +
scale_y_continuous(expand = c(0, 0)) +
scale_x_continuous(expand = c(0, 0))
t.test(full.dataset$PE.Centroid, full.dataset$PE.Width, var.equal = TRUE)
Two Sample t-test
data: full.dataset$PE.Centroid and full.dataset$PE.Width
t = 0.12797, df = 428, p-value = 0.8982
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.219891 1.389806
sample estimates:
mean of x mean of y
0.03266882 -0.05228858
<- ggplot(data = full.dataset,
p mapping = aes(x = PE.Centroid, y = PE.Width)) +
geom_point(aes(color = ID), alpha = 0.5, size = 3) +
geom_smooth(formula = y ~ x, method = "lm", size = 1.25,
color = "#4d4d4d") +
theme_classic() +
annotate("text", x = Inf, y = -15,
label = "paste(italic(R^2), \" = .56 \")",
parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
labs(x = "PE Centroid Size", y = "PE Centrum Width") +
theme(legend.position="none",
axis.line = element_line(color = "#4d4d4d", size = 1),
axis.text.x = element_text(color = "#4d4d4d", size = 12),
axis.text.y = element_text(color = "#4d4d4d", size = 12),
axis.title.x = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.title.y = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
axis.ticks.y = element_line(color = "#4d4d4d", size = 1))
ggMarginal(p, xparams = list(color = "#f8766d", fill = "#f8766d", alpha = 0.5, size = 1.25),
yparams = list(color = "#00bfc4", fill = "#00bfc4", alpha = 0.5, size = 1.25))
Mean Prediction Error (MPE)
<- ggplot(data = MPE, mapping = aes(x = MPE.Centroid, y = MPE.Width))
p + geom_point(aes(color = ID), alpha = 0.5, size = 3) +
p geom_smooth(formula = y ~ x, method = "lm", size = 1.25,
color = "#4d4d4d") +
theme_classic() +
annotate("text", x = Inf, y = -15,
label = "paste(italic(R^2), \" = .63 \")",
parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
labs(x = "MPE Centroid Size", y = "MPE Centrum Width") +
theme(legend.position="none",
axis.line = element_line(color = "#4d4d4d", size = 1),
axis.text.x = element_text(color = "#4d4d4d", size = 12),
axis.text.y = element_text(color = "#4d4d4d", size = 12),
axis.title.x = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.title.y = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
axis.ticks.y = element_line(color = "#4d4d4d", size = 1))
summary(lm(data = MPE, MPE.Width ~ MPE.Centroid))
Call:
lm(formula = MPE.Width ~ MPE.Centroid, data = MPE)
Residuals:
Min 1Q Median 3Q Max
-4.7726 -2.4536 -0.7976 2.1232 6.0297
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.05754 0.82072 -0.070 0.945031
MPE.Centroid 0.91819 0.18205 5.044 0.000146 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 3.38 on 15 degrees of freedom
Multiple R-squared: 0.6291, Adjusted R-squared: 0.6043
F-statistic: 25.44 on 1 and 15 DF, p-value: 0.0001456
ARCHAEOLOGICAL BODY SIZE ESTIMATIONS
setwd("Archaeological Estimates")
<- list.files(pattern = "\\.txt$")
files <- data.frame()
results
for (i in seq_along(files)) {
<- paste(files[i], sep="/")
fname
<- read.table(fname, header = T, row.names = 1,
data stringsAsFactors = FALSE)
<-arrayspecs(data, ncol(data)/3, 3)
a
<- gpagen(a, curves = NULL, surfaces = NULL, PrinAxes = TRUE,
mydata.gpa max.iter = NULL,
ProcD = TRUE, Proj = TRUE, print.progress = FALSE)
<- data.frame(mydata.gpa$Csize)
centroid.df <- tibble::rownames_to_column(centroid.df, "ID")
centroid.df
<- centroid.df %>%
centroid.clean separate("ID", into = "ID") %>%
merge(body.size, by="ID") %>%
::rename(Csize = mydata.gpa.Csize)
dplyr
<- summary(lm(data = centroid.clean, SL ~ Csize))
fit1
<- cor.test(centroid.clean$SL, centroid.clean$Csize,
fit2 method = "spearman")
<- (fit1$coefficients[[2]]*mydata.gpa$Csize[[1]])+
Arch_Size $coefficients[[1]]
fit1
1] <- fit1$coefficients[2]
results[i,2] <- mydata.gpa$Csize[[1]]
results[i,3] <- fit1$coefficients[1]
results[i,4] <- fit1$r.squared
results[i,5] <- (fit2$estimate)^2
results[i,6] <- Arch_Size
results[i,
}
rownames(results) <- sub(".txt", "", files)
colnames(results) <- c("Slope", "Csize", "Intercept", "R2", "Rho2",
"Arch_SL")
round(results, digits = 2)
Slope Csize Intercept R2 Rho2 Arch_SL
1304_BS 33.31 10.89 -21.54 0.90 0.89 341.36
1554_UV1 18.39 19.17 -38.22 0.81 0.61 314.26
1567_HYO 51.16 8.21 63.92 0.79 0.61 484.12
2005.27.142_HYO_UI30 51.16 5.26 63.92 0.79 0.61 332.85
2005.27.152_CV1_UI32 24.36 11.56 45.47 0.86 0.83 327.14
2005.27.152_CV2_UI32 43.17 6.60 48.47 0.87 0.85 333.31
2005.27.152_HYO_UI33 51.16 8.97 63.92 0.79 0.61 522.60
2005.27.157_UR_UI24 22.55 19.67 44.57 0.92 0.78 488.07
2005.27.158_CV_UI26 21.35 15.74 44.34 0.86 0.83 380.45
2005.27.161_UV_UI25 76.16 3.69 -1.53 0.92 0.88 279.78
2005.27.165_CV_UI29 23.17 16.89 44.46 0.87 0.84 435.78
2005.27.168_OPC_UI42 49.87 7.08 86.66 0.69 0.52 439.77
2005.27.169_CV1_U31 43.17 8.96 48.47 0.87 0.85 435.30
2005.27.169_CV2_U31 43.17 8.90 48.47 0.87 0.85 432.67
2005.27.169_CV3_U31 43.17 6.36 48.47 0.87 0.85 323.04
2005.27.169_CV4_U31 43.17 6.28 48.47 0.87 0.85 319.70
2005.27.173_CV1_U40 43.17 8.00 48.47 0.87 0.85 393.66
2005.27.173_CV2_U40 43.17 7.46 48.47 0.87 0.85 370.38
2005.27.331_CV_UI46 43.17 7.54 48.47 0.87 0.85 373.87
2005.27.331_UV_UI46 59.44 7.65 8.80 0.95 0.95 463.64
2005.27.458_CV_UI20 49.18 8.86 44.85 0.83 0.76 480.79
2007.46.1098_CV_UI3 43.17 9.73 48.47 0.87 0.85 468.56
2007.46.1100_SUB 15.11 32.93 -60.75 0.87 0.91 437.01
2007.46.2161_UI16_1 43.17 9.63 48.47 0.87 0.85 464.34
2007.46.2161_UI16_10 43.17 7.79 48.47 0.87 0.85 384.60
2007.46.2161_UI16_11 43.17 7.39 48.47 0.87 0.85 367.40
2007.46.2161_UI16_12 43.17 5.85 48.47 0.87 0.85 300.82
2007.46.2161_UI16_13 43.17 3.30 48.47 0.87 0.85 191.04
2007.46.2161_UI16_2 43.17 9.65 48.47 0.87 0.85 464.91
2007.46.2161_UI16_3 43.17 9.96 48.47 0.87 0.85 478.27
2007.46.2161_UI16_4 43.17 9.46 48.47 0.87 0.85 456.80
2007.46.2161_UI16_5 43.17 9.20 48.47 0.87 0.85 445.48
2007.46.2161_UI16_6 43.17 9.51 48.47 0.87 0.85 459.00
2007.46.2161_UI16_7 43.17 8.92 48.47 0.87 0.85 433.40
2007.46.2161_UI16_8 43.17 8.48 48.47 0.87 0.85 414.56
2007.46.2161_UI16_9 43.17 8.17 48.47 0.87 0.85 401.04
2007.46.2207_CV_UI15 43.17 10.66 48.47 0.87 0.85 508.51
2007.46.3104_BS_UI62 33.31 12.62 -21.54 0.90 0.89 398.77
2007.46.3442_CV_UI61 43.17 11.28 48.47 0.87 0.85 535.31
2007.46.4008_CT_UI53 24.24 20.57 -9.90 0.87 0.77 488.75
2007.46.4164_SUB_UI78 22.16 22.47 3.66 0.84 0.65 501.69
205.27.167_CV_UI43 52.22 9.23 48.15 0.90 0.89 530.09
205.27.167_HYO_UI43 18.17 12.57 84.31 0.70 0.46 312.58
523_MX 35.90 9.69 84.54 0.83 0.64 432.34
90.20.1199_BS_UI19 33.31 14.46 -21.54 0.90 0.89 460.17
99.20.1111_CT_UI68 26.86 8.10 74.48 0.54 0.44 291.98
99.20.1150_PT_UI58 26.88 11.55 15.55 0.99 0.98 325.84
99.20.160_UV_UI74 59.44 9.14 8.80 0.95 0.95 552.24
99.20.363_UV_UI73 18.39 21.42 -38.22 0.81 0.61 355.58
99.20.9_QUA_UI70 13.02 20.47 58.55 0.76 0.67 325.10
99.22.1765_CV_UI71 21.35 18.09 44.34 0.86 0.83 430.52
99.22.2640_OPC_UI72 49.87 8.04 86.66 0.69 0.52 487.53
99.22.3136_CV_UI8 21.35 18.37 44.34 0.86 0.83 436.54
99.22.882_OPC_UI11 49.87 7.36 86.66 0.69 0.52 453.59
BK.70.71_CT 26.86 8.11 74.48 0.54 0.44 292.29
BK.70.71_CV 43.17 6.43 48.47 0.87 0.85 326.11
BK.70.71_HYO 51.16 5.13 63.92 0.79 0.61 326.53
BK.70.71_MET 23.02 14.09 6.88 0.93 0.76 331.21
BK.70.71_UV 59.44 5.44 8.80 0.95 0.95 331.89
FN51_ENT 38.94 7.15 117.45 0.67 0.38 395.94
Visualize Archaeological Distribution
%>%
results ggplot(aes(Arch_SL)) +
geom_density(fill = "#00bfc4", color = "#00bfc4", bw = 50, alpha = 0.5,
size = 1.5) +
theme_classic() +
theme(legend.position="none",
axis.line = element_line(color = "#4d4d4d", size = 1),
plot.title = element_text(color = "#4d4d4d", size = 16,
face = "bold"),
axis.text.x = element_text(color = "#4d4d4d", size = 12),
axis.text.y = element_text(color = "#4d4d4d", size = 12),
axis.title.x = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.title.y = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
axis.ticks.y = element_line(color = "#4d4d4d", size = 1)) +
labs(title = "Archaeological Size Distribution",
x = "Standard Length (mm)", y = "Density") +
xlim(0, 700)
MODERN COMPARISONS
Total Length (TL)
A length-length conversion factor from Standard Length (SL) to Total Length (TL) was applied to the archaeological SL estimates. All modern comparison data uses TL. TL could not be estimated per archaeological specimen considering that two specimens from the Museum of Southwestern Biology comparative library (25273, 50002, and 50003) do not have TL measurements. A SL to TL conversion factor of 1.27 was chosen by calculating the mean values available for Ictiobus bubalus and Carpiodes carpio on fishbase.de. Available here and here.
# convert archaeological SL to TL
<- mutate(results, Arch_TL = Arch_SL*1.27)
results
# visualize archaeological distribution
%>%
results ggplot(aes(Arch_TL)) +
geom_density(fill = "#00bfc4", color = "#00bfc4", bw = 50, alpha = 0.5,
size = 1.5) +
theme_classic() +
theme(legend.position="none",
axis.line = element_line(color = "#4d4d4d", size = 1),
plot.title = element_text(color = "#4d4d4d", size = 16,
face = "bold"),
axis.text.x = element_text(color = "#4d4d4d", size = 12),
axis.text.y = element_text(color = "#4d4d4d", size = 12),
axis.title.x = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.title.y = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
axis.ticks.y = element_line(color = "#4d4d4d", size = 1)) +
labs(title = "Archaeological Size Distribution",
x = "Total Length (mm)", y = "Density") +
xlim(100, 900)
Error Associated with SL to TL Length-Length Conversion
TL
and TL_estimate
of specimens from the comparative library are almost perfectly correlated (R2 = 0.99; rho = 1). This means that error associated with the TL conversion factor (1.27) is extremely low. Further, the conversion factor will underestimate TL if there is error. This can be seen by visually inspecting the graph below.
<- body.size %>%
body.size.estimate na.omit() %>%
mutate(TL_estimate = SL * 1.27)
<- ggplot(data = body.size.estimate,
p mapping = aes(x = TL_estimate, y = TL))
+ geom_point(alpha = 0.5, size = 4) +
p geom_smooth(formula = y ~ x, method = "lm", size = 1.25,
color = "#4d4d4d") +
annotate("text", x = 400, y = 215,
label = "paste(italic(R) ^ 2, \" = .99, \", italic(rho),
\" = 1 \")",
parse = T, hjust = 1, vjust = 0, color = "#4d4d4d") +
theme_classic() +
labs(x = "Estimated Total Length (mm)", y = "Actual Total Length (mm)") +
theme(legend.position="none",
axis.line = element_line(color = "#4d4d4d", size = 1),
axis.text.x = element_text(color = "#4d4d4d", size = 12),
axis.text.y = element_text(color = "#4d4d4d", size = 12),
axis.title.x = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.title.y = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
axis.ticks.y = element_line(color = "#4d4d4d", size = 1))
summary(lm(data = body.size.estimate, TL ~ TL_estimate))
Call:
lm(formula = TL ~ TL_estimate, data = body.size.estimate)
Residuals:
Min 1Q Median 3Q Max
-4.7035 -2.6240 -0.6684 1.8637 10.3026
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 18.76522 7.36499 2.548 0.0256 *
TL_estimate 0.97281 0.02235 43.517 1.41e-14 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 3.968 on 12 degrees of freedom
Multiple R-squared: 0.9937, Adjusted R-squared: 0.9932
F-statistic: 1894 on 1 and 12 DF, p-value: 1.41e-14
<-cor.test(body.size.estimate$TL, body.size.estimate$TL_estimate,
rho method = "spearman")
rho
Spearman's rank correlation rho
data: body.size.estimate$TL and body.size.estimate$TL_estimate
S = 0, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
1
Calculating Modern Comparison
Archaeological Estimates
# specify breaks
<- c(-Inf,200,250,300,350,400,450,500,550,Inf)
breaks
# specify bin labels
<- c("-199", "200-249", "250-299", "300-349", "350-399","400-449",
tags "450-499","500-549", "550+")
# put values into bins
<- cut(results$Arch_TL,
group_tags breaks=breaks,
include.lowest=TRUE,
right=FALSE,
labels=tags)
# plot
<- as_tibble(summary(group_tags), rownames = "bins") %>%
a ::rename(count = value) %>%
dplyrmutate(percent = (count/sum(count)*100)) %>%
::select(bins, percent) %>%
dplyrmutate(time = "Archaeological")
Moody (1970)
# overall percentages (1967-1970) reported in Table 4
<- tibble(bins = c("-199", "200-249", "250-299", "300-349", "350-399",
b "400-449", "450-499","500-549", "550+"),
percent = c(0, 2, 3, 11, 15, 21, 36, 11, 1)) %>%
mutate(time = "Commercial")
NM Game and Fish
<- read.table("Modern Comparison/NM Game and Fish.txt",
NMgamefish header = TRUE)
# specify breaks
<- c(-Inf, 200, 250, 300, 350, 400, 450, 500, 550, Inf)
breaks # specify bin labels
<- c("-199", "200-249", "250-299", "300-349", "350-399","400-449",
tags "450-499","500-549", "550+")
# puttingvalues into bins
<- cut(NMgamefish$TL,
group_tags breaks=breaks,
include.lowest=TRUE,
right=FALSE,
labels=tags)
# plot
<- as_tibble(summary(group_tags), rownames = "bins") %>%
c ::rename(count = value) %>%
dplyrmutate(percent = (count/sum(count)*100)) %>%
::select(bins, percent) %>%
dplyrmutate(time = "Non_Commercial")
Bind Together and Plot
<- rbind(a, b, c)
d
$time <- factor(d$time, levels = c("Commercial", "Non_Commercial",
d"Archaeological"))
$time2 <- factor(d$time,
dlabels = c("Commercial Fishery (1967–1970)",
"Non-Commercial Fishery (2011–2017)",
"Archaeological Fishery (ca. AD 1300-1600)"))
ggplot(d, aes(x = bins, y = percent)) +
geom_bar(stat = "identity", size = 1.5) +
facet_wrap(~ time2, nrow = 3) +
scale_y_continuous(breaks = seq(0, 45, by = 15)) +
theme_classic() +
theme(legend.position="top",
strip.text.x = element_text(color = "#4d4d4d", size = 12,
face = "bold"),
strip.background = element_rect(color= NA, fill= NA),
legend.title = element_blank(),
axis.line = element_line(color = "#4d4d4d", size = 1),
axis.text.x = element_text(color = "#4d4d4d", size = 7),
axis.text.y = element_text(color = "#4d4d4d", size = 8),
axis.title.x = element_text(color = "#4d4d4d", size = 10,
face = "bold"),
axis.title.y = element_text(color = "#4d4d4d", size = 10,
face = "bold"),
axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
axis.ticks.y = element_line(color = "#4d4d4d", size = 1)) +
labs(x = "Total Length (mm)", y = "Percent (%)")
INTRA- AND INTERINDIVIDUAL ERROR TESTING
Intraobserver Error
Analyst 1 (Alexandra Harris)
Run a Generalized Procrustes Analysis for all Analyst 1 datafiles. Each .txt file pertains to a specimen and contains five replicate landmark configurations
setwd("Error Testing/Alex")
<- list.files(pattern = "\\.txt$")
files <- list()
my.list
for (i in seq_along(files)) {
<- paste(files[i], sep="/")
fname
<- read.table(fname, header = T, row.names = 1,
data stringsAsFactors = FALSE)
<-arrayspecs(data, ncol(data)/3, 3)
a
<- gpagen(a, curves = NULL, surfaces = NULL, PrinAxes = TRUE,
mydata.gpa max.iter = NULL,
ProcD = TRUE, Proj = TRUE, print.progress = FALSE)
<- mydata.gpa
my.list[[i]] }
Establish number of rows in each landmark configuration
<- rep(NA, 60)
rows for(i in seq_along(my.list)){
<- dim(my.list[[i]][["coords"]])[1]
rows[i] }
Create lists out of all coordinates per replicate per specimen and all consensuses per specimen
# initiate
<- list()
coords for(i in 1:60){
<- array(NA, dim = c(rows[i], 3, 5))
coords[[i]]
}
# isolate coordinates per specimen per analyst per replicate
for(i in seq_along(my.list)){
for(j in 1:5){
<- my.list[[i]][["coords"]][,,j]
coords[[i]][,,j]
}
}
# initiate
<- list()
consensus
# isolate consensus per specimen
for(i in seq_along(my.list)){
<- my.list[[c(i, 4)]]
consensus[[i]] }
Calculate procd
(procd
= total Procrustes distance from consensus)
# initiate
<- list()
output1 for(i in 1:60){
<- array(NA, dim = c(rows[i], 3, 5))
output1[[i]]
}
# subtract and square
for(i in seq_along(coords)){
for(j in 1:5){
<- (coords[[i]][,,j] - consensus[[i]])^2
output1[[i]][,,j]
}
}
# initiate
<- list()
output2 for(i in 1:60){
<- array(NA, dim = c(1, rows[i], 5))
output2[[i]]
}
# sum rows
for(i in seq_along(output1)){
for(j in 1:5){
<- rowSums(output1[[i]][,,j])
output2[[i]][,,j]
}
}
# initiate
<- list()
procd for(i in 1:60){
<- array(NA, dim = c(1, 1, 5))
procd[[i]]
}
# sum and square root
for(i in 1:60){
for(j in 1:5){
<- sqrt(sum(output2[[i]][,,j]))
procd[[i]][,,j]
} }
Transform procd
and assign to Analyst 1
# create dataframe
<- data.frame(unlist(procd))
procd
#subset data for Analyst 1 and transform
<- procd %>%
analyst1.procd mutate(analyst = "Analyst 1",
replicate = rep(c("1", "2", "3", "4", "5"), times = 60)) %>%
rename(procd = colnames(procd)[1])
Analyst 2 (Jonathan Dombrosky)
Run a Generalized Procrustes Analysis for all Analyst 2 datafiles. Each .txt file pertains to a specimen and contains five replicate landmark configurations
setwd("Error Testing/Jon")
<- list.files(pattern = "\\.txt$")
files <- list()
my.list
for (i in seq_along(files)) {
<- paste(files[i], sep="/")
fname
<- read.table(fname, header = T, row.names = 1,
data stringsAsFactors = FALSE)
<-arrayspecs(data, ncol(data)/3, 3)
a
<- gpagen(a, curves = NULL, surfaces = NULL, PrinAxes = TRUE,
mydata.gpa max.iter = NULL,
ProcD = TRUE, Proj = TRUE, print.progress = FALSE)
<- mydata.gpa
my.list[[i]] }
Establish number of rows in each landmark configuration
<- rep(NA, 60)
rows for(i in seq_along(my.list)){
<- dim(my.list[[i]][["coords"]])[1]
rows[i] }
Create lists out of all coordinates per replicate per specimen and all consensuses per specimen
# initiate
<- list()
coords for(i in 1:60){
<- array(NA, dim = c(rows[i], 3, 5))
coords[[i]]
}
# isolate coordinates per specimen per analyst per replicate
for(i in seq_along(my.list)){
for(j in 1:5){
<- my.list[[i]][["coords"]][,,j]
coords[[i]][,,j]
}
}
# initiate
<- list()
consensus
# isolate consensus per specimen
for(i in seq_along(my.list)){
<- my.list[[c(i, 4)]]
consensus[[i]] }
Calculate procd
(procd
= total Procrustes distance from consensus)
# initiate
<- list()
output1 for(i in 1:60){
<- array(NA, dim = c(rows[i], 3, 5))
output1[[i]]
}
# subtract and square
for(i in seq_along(coords)){
for(j in 1:5){
<- (coords[[i]][,,j] - consensus[[i]])^2
output1[[i]][,,j]
}
}
# initiate
<- list()
output2 for(i in 1:60){
<- array(NA, dim = c(1, rows[i], 5))
output2[[i]]
}
# sum rows
for(i in seq_along(output1)){
for(j in 1:5){
<- rowSums(output1[[i]][,,j])
output2[[i]][,,j]
}
}
# initiate
<- list()
procd for(i in 1:60){
<- array(NA, dim = c(1, 1, 5))
procd[[i]]
}
# sum and square root
for(i in 1:60){
for(j in 1:5){
<- sqrt(sum(output2[[i]][,,j]))
procd[[i]][,,j]
} }
Transform procd
and assign to Analyst 2
# create dataframe
<- data.frame(unlist(procd))
procd
#subset data for Analyst 1 and transform
<- procd %>%
analyst2.procd mutate(analyst = "Analyst 2",
replicate = rep(c("1", "2", "3", "4", "5"), times = 60)) %>%
rename(procd = colnames(procd)[1])
Bind Analyst 1 and Analyst 2 datasets
<- rbind(analyst1.procd, analyst2.procd) procd
Visualize and Significance Testing
One way ANOVA tests indicate that the mean values of procd
are equal between replicates for Analyst 1 (p = 0.91) and Analyst 2 (p = 0.31). Further, the effect size between replicates is extremely small for Analyst 1 (Eta2 < 0.01) and Analyst 2 (Eta2 = 0.02). The landmarking configurations on the entire archaeological dataset are practically indistinguishable between replicates of the same analyst.
%>%
procd ggplot(mapping = aes(x = replicate, y = procd, group = replicate,
fill = replicate, color = replicate)) +
geom_boxplot(size = 0.75, alpha = 0.5, outlier.alpha = 0.5,
outlier.size = 2.5) +
facet_wrap(~ analyst, nrow = 2) +
theme_classic() +
theme(legend.position= "none",
strip.background = element_blank(),
strip.text.x = element_text(color = "#4d4d4d", size = 16,
face = "bold"),
axis.line = element_line(color = "#4d4d4d", size = 1),
axis.text.x = element_text(color = "#4d4d4d", size = 12),
axis.text.y = element_text(color = "#4d4d4d", size = 12),
axis.title.x = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.title.y = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
axis.ticks.y = element_line(color = "#4d4d4d", size = 1)) +
labs(x = "Replicate", y = "Total Procrustes Distance\nfrom Consensus")
<- aov(procd ~ replicate, data = analyst1.procd)
oneway.analyst1 summary(oneway.analyst1)
Df Sum Sq Mean Sq F value Pr(>F)
replicate 4 0.00096 0.0002397 0.254 0.907
Residuals 295 0.27886 0.0009453
eta_squared(oneway.analyst1)
For one-way between subjects designs, partial eta squared is equivalent
to eta squared. Returning eta squared.
# Effect Size for ANOVA
Parameter | Eta2 | 95% CI
-----------------------------------
replicate | 3.43e-03 | [0.00, 1.00]
- One-sided CIs: upper bound fixed at [1.00].
<- aov(procd ~ replicate, data = analyst2.procd)
oneway.analyst2 summary(oneway.analyst2)
Df Sum Sq Mean Sq F value Pr(>F)
replicate 4 0.0188 0.004707 1.193 0.314
Residuals 295 1.1636 0.003944
eta_squared(oneway.analyst2)
For one-way between subjects designs, partial eta squared is equivalent
to eta squared. Returning eta squared.
# Effect Size for ANOVA
Parameter | Eta2 | 95% CI
-------------------------------
replicate | 0.02 | [0.00, 1.00]
- One-sided CIs: upper bound fixed at [1.00].
Swarmplot
library(ggbeeswarm)
%>%
procd ggplot(mapping = aes(x = replicate, y = procd, group = replicate,
fill = replicate, color = replicate)) +
geom_beeswarm(size = 4, cex = 0.8, alpha = 0.2) +
geom_beeswarm(size = 4, cex = 0.8, shape = 21, fill = NA) +
facet_wrap(~ analyst, nrow = 2) +
theme_classic() +
theme(legend.position= "none",
strip.background = element_blank(),
strip.text.x = element_text(color = "#4d4d4d", size = 16,
face = "bold"),
axis.line = element_line(color = "#4d4d4d", size = 1),
axis.text.x = element_text(color = "#4d4d4d", size = 12),
axis.text.y = element_text(color = "#4d4d4d", size = 12),
axis.title.x = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.title.y = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
axis.ticks.y = element_line(color = "#4d4d4d", size = 1)) +
labs(x = "Replicate", y = "Total Procrustes Distance from Consensus")
Interobserver Error
Run a Generalized Procrustes Analysis for all datafiles (both Analyst 1 and Analyst 2 combined). Each .txt file pertains to a specimen and contains five replicate landmark configurations per analyst.
setwd("Error Testing/Both")
<- list.files(pattern = "\\.txt$")
files <- list()
my.list
for (i in seq_along(files)) {
<- paste(files[i], sep="/")
fname
<- read.table(fname, header = T, row.names = 1,
data stringsAsFactors = FALSE)
<-arrayspecs(data, ncol(data)/3, 3)
a
<- gpagen(a, curves = NULL, surfaces = NULL, PrinAxes = TRUE,
mydata.gpa max.iter = NULL,
ProcD = TRUE, Proj = TRUE, print.progress = FALSE)
<- mydata.gpa
my.list[[i]] }
Establish number of rows in each landmark configuration
<- rep(NA, 60)
rows for(i in seq_along(my.list)){
<- dim(my.list[[i]][["coords"]])[1]
rows[i] }
Create lists out of all coordinates per replicate per analyst per specimen and all consensuses per specimen
# initiate
<- list()
coords for(i in 1:60){
<- array(NA, dim = c(rows[i], 3, 10))
coords[[i]]
}
# isolate coordinates per specimen per analyst per replicate
for(i in seq_along(my.list)){
for(j in 1:10){
<- my.list[[i]][["coords"]][,,j]
coords[[i]][,,j]
}
}
# initiate
<- list()
consensus
# isolate consensus per specimen
for(i in seq_along(my.list)){
<- my.list[[c(i, 4)]]
consensus[[i]] }
Calculate procd
(procd
= total Procrustes distance from consensus)
# initiate
<- list()
output1 for(i in 1:60){
<- array(NA, dim = c(rows[i], 3, 10))
output1[[i]]
}
# subtract and square
for(i in seq_along(coords)){
for(j in 1:10){
<- (coords[[i]][,,j] - consensus[[i]])^2
output1[[i]][,,j]
}
}
# initiate
<- list()
output2 for(i in 1:60){
<- array(NA, dim = c(1, rows[i], 10))
output2[[i]]
}
# sum rows
for(i in seq_along(output1)){
for(j in 1:10){
<- rowSums(output1[[i]][,,j])
output2[[i]][,,j]
}
}
# initiate
<- list()
procd for(i in 1:60){
<- array(NA, dim = c(1, 1, 10))
procd[[i]]
}
# sum and square root
for(i in 1:60){
for(j in 1:10){
<- sqrt(sum(output2[[i]][,,j]))
procd[[i]][,,j]
} }
Transform procd
# create dataframe
<- data.frame(unlist(procd))
procd
#subset data for Analyst 1 and transform
<- data.frame(procd[c(rep(TRUE, 5), rep(FALSE, 5)),])
analyst1.procd
<- analyst1.procd %>%
analyst1.procd mutate(analyst = "1") %>%
rename(procd = colnames(analyst1.procd)[1])
#subset data for Analyst 2 and transform
<- data.frame(procd[c(rep(FALSE, 5), rep(TRUE, 5)),])
analyst2.procd
<- analyst2.procd %>%
analyst2.procd mutate(analyst = "2") %>%
rename(procd = colnames(analyst2.procd)[1])
#bind transformed datasets for Analyst 1 and 2
<- rbind(analyst1.procd, analyst2.procd) procd
Visualize and Significance Testing
An independent t-test indicates that the mean values of procd
between Analyst 1 and Analyst 2 are equal (p = 0.89). Further, the effect size between the two means is extremely small (Cohen’s d = 0.01). The landmarking configuration on the entire archaeological dataset (replicated five times) is practically indistinguishable between Analyst 1 and Analyst 2.
%>%
procd ggplot(mapping = aes(x = analyst, y = procd, group = analyst,
fill = analyst, color = analyst)) +
geom_boxplot(size = 0.75, alpha = 0.5, outlier.alpha = 0.5,
outlier.size = 2.5) +
theme_classic() +
theme(legend.position= "none",
strip.background = element_blank(),
strip.text.x = element_text(color = "#4d4d4d", size = 16,
face = "bold"),
axis.line = element_line(color = "#4d4d4d", size = 1),
axis.text.x = element_text(color = "#4d4d4d", size = 12),
axis.text.y = element_text(color = "#4d4d4d", size = 12),
axis.title.x = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.title.y = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
axis.ticks.y = element_line(color = "#4d4d4d", size = 1)) +
labs(x = "Analyst", y = "Total Procrustes Distance\n from Consensus")
<- t.test(analyst1.procd$procd, analyst2.procd$procd)
t.test t.test
Welch Two Sample t-test
data: analyst1.procd$procd and analyst2.procd$procd
t = -0.13727, df = 570.74, p-value = 0.8909
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.01440174 0.01252025
sample estimates:
mean of x mean of y
0.05045055 0.05139130
cohens_d(t.test)
Cohen's d | 95% CI
-------------------------
-0.01 | [-0.17, 0.15]
- Estimated using un-pooled SD.
Swarmplot
%>%
procd ggplot(mapping = aes(x = analyst, y = procd, group = analyst,
fill = analyst, color = analyst)) +
geom_beeswarm(size = 4, cex = 0.8, alpha = 0.2) +
geom_beeswarm(size = 4, cex = 0.8, shape = 21, fill = NA) +
theme_classic() +
theme(legend.position= "none",
strip.background = element_blank(),
strip.text.x = element_text(color = "#4d4d4d", size = 16,
face = "bold"),
axis.line = element_line(color = "#4d4d4d", size = 1),
axis.text.x = element_text(color = "#4d4d4d", size = 12),
axis.text.y = element_text(color = "#4d4d4d", size = 12),
axis.title.x = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.title.y = element_text(color = "#4d4d4d", size = 14,
face = "bold"),
axis.ticks.x = element_line(color = "#4d4d4d", size = 1),
axis.ticks.y = element_line(color = "#4d4d4d", size = 1)) +
labs(x = "Analyst", y = "Total Procrustes Distance\n from Consensus")